library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
-- Attaching packages -------------------------------------------------------------------- tidyverse 1.3.1 --
√ ggplot2 3.3.5 √ purrr 0.3.4
√ tibble 3.1.6 √ dplyr 1.0.8
√ tidyr 1.2.0 √ stringr 1.4.0
√ readr 2.1.2 √ forcats 0.5.1
-- Conflicts ----------------------------------------------------------------------- tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
library(readxl)
Warning: package ‘readxl’ was built under R version 4.1.3
candy_2015 <- read_excel("../raw_data/boing-boing-candy-2015.xlsx")
candy_2016 <- read_excel("../raw_data/boing-boing-candy-2016.xlsx")
candy_2017 <- read_excel("../raw_data/boing-boing-candy-2017.xlsx")
New names:
* `` -> ...114
here::here()
[1] "C:/Users/mahri/OneDrive/CodeClan/dirty_data_project/dirty_data_codeclan_project_mahri/dirty_data_task_4_mahri"
From glimpse
head(candy_2015)
glimpse(candy_2016)
Rows: 1,259
Columns: 123
$ Timestamp <dttm> ~
$ `Are you going actually going trick or treating yourself?` <chr> ~
$ `Your gender:` <chr> ~
$ `How old are you?` <chr> ~
$ `Which country do you live in?` <chr> ~
$ `Which state, province, county do you live in?` <chr> ~
$ `[100 Grand Bar]` <chr> ~
$ `[Anonymous brown globs that come in black and orange wrappers]` <chr> ~
$ `[Any full-sized candy bar]` <chr> ~
$ `[Black Jacks]` <chr> ~
$ `[Bonkers (the candy)]` <chr> ~
$ `[Bonkers (the board game)]` <chr> ~
$ `[Bottle Caps]` <chr> ~
$ `[Box'o'Raisins]` <chr> ~
$ `[Broken glow stick]` <chr> ~
$ `[Butterfinger]` <chr> ~
$ `[Cadbury Creme Eggs]` <chr> ~
$ `[Candy Corn]` <chr> ~
$ `[Candy that is clearly just the stuff given out for free at restaurants]` <chr> ~
$ `[Caramellos]` <chr> ~
$ `[Cash, or other forms of legal tender]` <chr> ~
$ `[Chardonnay]` <chr> ~
$ `[Chick-o-Sticks (we don’t know what that is)]` <chr> ~
$ `[Chiclets]` <chr> ~
$ `[Coffee Crisp]` <chr> ~
$ `[Creepy Religious comics/Chick Tracts]` <chr> ~
$ `[Dental paraphenalia]` <chr> ~
$ `[Dots]` <chr> ~
$ `[Dove Bars]` <chr> ~
$ `[Fuzzy Peaches]` <chr> ~
$ `[Generic Brand Acetaminophen]` <chr> ~
$ `[Glow sticks]` <chr> ~
$ `[Goo Goo Clusters]` <chr> ~
$ `[Good N' Plenty]` <chr> ~
$ `[Gum from baseball cards]` <chr> ~
$ `[Gummy Bears straight up]` <chr> ~
$ `[Hard Candy]` <chr> ~
$ `[Healthy Fruit]` <chr> ~
$ `[Heath Bar]` <chr> ~
$ `[Hershey's Dark Chocolate]` <chr> ~
$ `[Hershey’s Milk Chocolate]` <chr> ~
$ `[Hershey's Kisses]` <chr> ~
$ `[Hugs (actual physical hugs)]` <chr> ~
$ `[Jolly Rancher (bad flavor)]` <chr> ~
$ `[Jolly Ranchers (good flavor)]` <chr> ~
$ `[JoyJoy (Mit Iodine!)]` <chr> ~
$ `[Junior Mints]` <chr> ~
$ `[Senior Mints]` <chr> ~
$ `[Kale smoothie]` <chr> ~
$ `[Kinder Happy Hippo]` <chr> ~
$ `[Kit Kat]` <chr> ~
$ `[LaffyTaffy]` <chr> ~
$ `[LemonHeads]` <chr> ~
$ `[Licorice (not black)]` <chr> ~
$ `[Licorice (yes black)]` <chr> ~
$ `[Lindt Truffle]` <chr> ~
$ `[Lollipops]` <chr> ~
$ `[Mars]` <chr> ~
$ `[Mary Janes]` <chr> ~
$ `[Maynards]` <chr> ~
$ `[Mike and Ike]` <chr> ~
$ `[Milk Duds]` <chr> ~
$ `[Milky Way]` <chr> ~
$ `[Regular M&Ms]` <chr> ~
$ `[Peanut M&M’s]` <chr> ~
$ `[Blue M&M's]` <chr> ~
$ `[Red M&M's]` <chr> ~
$ `[Third Party M&M's]` <chr> ~
$ `[Minibags of chips]` <chr> ~
$ `[Mint Kisses]` <chr> ~
$ `[Mint Juleps]` <chr> ~
$ `[Mr. Goodbar]` <chr> ~
$ `[Necco Wafers]` <chr> ~
$ `[Nerds]` <chr> ~
$ `[Nestle Crunch]` <chr> ~
$ `[Now'n'Laters]` <chr> ~
$ `[Peeps]` <chr> ~
$ `[Pencils]` <chr> ~
$ `[Person of Interest Season 3 DVD Box Set (not including Disc 4 with hilarious outtakes)]` <chr> ~
$ `[Pixy Stix]` <chr> ~
$ `[Reese’s Peanut Butter Cups]` <chr> ~
$ `[Reese's Pieces]` <chr> ~
$ `[Reggie Jackson Bar]` <chr> ~
$ `[Rolos]` <chr> ~
$ `[Skittles]` <chr> ~
$ `[Smarties (American)]` <chr> ~
$ `[Smarties (Commonwealth)]` <chr> ~
$ `[Snickers]` <chr> ~
$ `[Sourpatch Kids (i.e. abominations of nature)]` <chr> ~
$ `[Spotted Dick]` <chr> ~
$ `[Starburst]` <chr> ~
$ `[Sweet Tarts]` <chr> ~
$ `[Swedish Fish]` <chr> ~
$ `[Sweetums (a friend to diabetes)]` <chr> ~
$ `[Tic Tacs]` <chr> ~
$ `[Those odd marshmallow circus peanut things]` <chr> ~
$ `[Three Musketeers]` <chr> ~
$ `[Tolberone something or other]` <chr> ~
$ `[Trail Mix]` <chr> ~
$ `[Twix]` <chr> ~
$ `[Vials of pure high fructose corn syrup, for main-lining into your vein]` <chr> ~
$ `[Vicodin]` <chr> ~
$ `[Whatchamacallit Bars]` <chr> ~
$ `[White Bread]` <chr> ~
$ `[Whole Wheat anything]` <chr> ~
$ `[York Peppermint Patties]` <chr> ~
$ `Please list any items not included above that give you JOY.` <chr> ~
$ `Please list any items not included above that give you DESPAIR.` <chr> ~
$ `Please leave any witty, snarky or thoughtful remarks or comments regarding your choices.` <chr> ~
$ `Guess the number of mints in my hand.` <chr> ~
$ `Betty or Veronica?` <chr> ~
$ `"That dress* that went viral a few years back - when I first saw it, it was ________"` <chr> ~
$ `What is your favourite font?` <chr> ~
$ `Please estimate the degree(s) of separation you have from the following celebrities [JK Rowling]` <chr> ~
$ `Please estimate the degree(s) of separation you have from the following celebrities [JJ Abrams]` <chr> ~
$ `Please estimate the degree(s) of separation you have from the following celebrities [Beyoncé]` <chr> ~
$ `Please estimate the degree(s) of separation you have from the following celebrities [Bieber]` <chr> ~
$ `Please estimate the degree(s) of separation you have from the following celebrities [Kevin Bacon]` <chr> ~
$ `Please estimate the degree(s) of separation you have from the following celebrities [Francis Bacon (1561 - 1626)]` <chr> ~
$ `Which day do you prefer, Friday or Sunday?` <chr> ~
$ `Do you eat apples the correct way, East to West (side to side) or do you eat them like a freak of nature, South to North (bottom to top)?` <chr> ~
$ `When you see the above image of the 4 different websites, which one would you most likely check out (please be honest).` <chr> ~
$ `[York Peppermint Patties] Ignore` <lgl> ~
glimpse(candy_2017)
Rows: 2,460
Columns: 120
$ `Internal ID` <dbl> 90258773, ~
$ `Q1: GOING OUT?` <chr> NA, "No", ~
$ `Q2: GENDER` <chr> NA, "Male"~
$ `Q3: AGE` <chr> NA, "44", ~
$ `Q4: COUNTRY` <chr> NA, "USA",~
$ `Q5: STATE, PROVINCE, COUNTY, ETC` <chr> NA, "NM", ~
$ `Q6 | 100 Grand Bar` <chr> NA, "MEH",~
$ `Q6 | Anonymous brown globs that come in black and orange wrappers\t(a.k.a. Mary Janes)` <chr> NA, "DESPA~
$ `Q6 | Any full-sized candy bar` <chr> NA, "JOY",~
$ `Q6 | Black Jacks` <chr> NA, "MEH",~
$ `Q6 | Bonkers (the candy)` <chr> NA, "DESPA~
$ `Q6 | Bonkers (the board game)` <chr> NA, "DESPA~
$ `Q6 | Bottle Caps` <chr> NA, "DESPA~
$ `Q6 | Box'o'Raisins` <chr> NA, "DESPA~
$ `Q6 | Broken glow stick` <chr> NA, "DESPA~
$ `Q6 | Butterfinger` <chr> NA, "DESPA~
$ `Q6 | Cadbury Creme Eggs` <chr> NA, "MEH",~
$ `Q6 | Candy Corn` <chr> NA, "MEH",~
$ `Q6 | Candy that is clearly just the stuff given out for free at restaurants` <chr> NA, "DESPA~
$ `Q6 | Caramellos` <chr> NA, "MEH",~
$ `Q6 | Cash, or other forms of legal tender` <chr> NA, "JOY",~
$ `Q6 | Chardonnay` <chr> NA, "MEH",~
$ `Q6 | Chick-o-Sticks (we don’t know what that is)` <chr> NA, "DESPA~
$ `Q6 | Chiclets` <chr> NA, "DESPA~
$ `Q6 | Coffee Crisp` <chr> NA, "DESPA~
$ `Q6 | Creepy Religious comics/Chick Tracts` <chr> NA, "DESPA~
$ `Q6 | Dental paraphenalia` <chr> NA, "DESPA~
$ `Q6 | Dots` <chr> NA, "MEH",~
$ `Q6 | Dove Bars` <chr> NA, "JOY",~
$ `Q6 | Fuzzy Peaches` <chr> NA, "DESPA~
$ `Q6 | Generic Brand Acetaminophen` <chr> NA, "DESPA~
$ `Q6 | Glow sticks` <chr> NA, "DESPA~
$ `Q6 | Goo Goo Clusters` <chr> NA, "DESPA~
$ `Q6 | Good N' Plenty` <chr> NA, "MEH",~
$ `Q6 | Gum from baseball cards` <chr> NA, "DESPA~
$ `Q6 | Gummy Bears straight up` <chr> NA, "MEH",~
$ `Q6 | Hard Candy` <chr> NA, "MEH",~
$ `Q6 | Healthy Fruit` <chr> NA, "DESPA~
$ `Q6 | Heath Bar` <chr> NA, "MEH",~
$ `Q6 | Hershey's Dark Chocolate` <chr> NA, "JOY",~
$ `Q6 | Hershey’s Milk Chocolate` <chr> NA, "JOY",~
$ `Q6 | Hershey's Kisses` <chr> NA, "MEH",~
$ `Q6 | Hugs (actual physical hugs)` <chr> NA, "DESPA~
$ `Q6 | Jolly Rancher (bad flavor)` <chr> NA, "DESPA~
$ `Q6 | Jolly Ranchers (good flavor)` <chr> NA, "MEH",~
$ `Q6 | JoyJoy (Mit Iodine!)` <chr> NA, "DESPA~
$ `Q6 | Junior Mints` <chr> NA, "DESPA~
$ `Q6 | Senior Mints` <chr> NA, "DESPA~
$ `Q6 | Kale smoothie` <chr> NA, "DESPA~
$ `Q6 | Kinder Happy Hippo` <chr> NA, "DESPA~
$ `Q6 | Kit Kat` <chr> NA, "JOY",~
$ `Q6 | LaffyTaffy` <chr> NA, "DESPA~
$ `Q6 | LemonHeads` <chr> NA, "MEH",~
$ `Q6 | Licorice (not black)` <chr> NA, "MEH",~
$ `Q6 | Licorice (yes black)` <chr> NA, "JOY",~
$ `Q6 | Lindt Truffle` <chr> NA, "MEH",~
$ `Q6 | Lollipops` <chr> NA, "DESPA~
$ `Q6 | Mars` <chr> NA, "DESPA~
$ `Q6 | Maynards` <chr> NA, "DESPA~
$ `Q6 | Mike and Ike` <chr> NA, "MEH",~
$ `Q6 | Milk Duds` <chr> NA, "MEH",~
$ `Q6 | Milky Way` <chr> NA, "JOY",~
$ `Q6 | Regular M&Ms` <chr> NA, "JOY",~
$ `Q6 | Peanut M&M’s` <chr> NA, "MEH",~
$ `Q6 | Blue M&M's` <chr> NA, "JOY",~
$ `Q6 | Red M&M's` <chr> NA, "JOY",~
$ `Q6 | Green Party M&M's` <chr> NA, "JOY",~
$ `Q6 | Independent M&M's` <chr> NA, "JOY",~
$ `Q6 | Abstained from M&M'ing.` <chr> NA, "DESPA~
$ `Q6 | Minibags of chips` <chr> NA, "DESPA~
$ `Q6 | Mint Kisses` <chr> NA, "MEH",~
$ `Q6 | Mint Juleps` <chr> NA, "DESPA~
$ `Q6 | Mr. Goodbar` <chr> NA, "DESPA~
$ `Q6 | Necco Wafers` <chr> NA, "DESPA~
$ `Q6 | Nerds` <chr> NA, "DESPA~
$ `Q6 | Nestle Crunch` <chr> NA, "JOY",~
$ `Q6 | Now'n'Laters` <chr> NA, "DESPA~
$ `Q6 | Peeps` <chr> NA, "DESPA~
$ `Q6 | Pencils` <chr> NA, "DESPA~
$ `Q6 | Pixy Stix` <chr> NA, "DESPA~
$ `Q6 | Real Housewives of Orange County Season 9 Blue-Ray` <chr> NA, "DESPA~
$ `Q6 | Reese’s Peanut Butter Cups` <chr> NA, "JOY",~
$ `Q6 | Reese's Pieces` <chr> NA, "JOY",~
$ `Q6 | Reggie Jackson Bar` <chr> NA, "DESPA~
$ `Q6 | Rolos` <chr> NA, "JOY",~
$ `Q6 | Sandwich-sized bags filled with BooBerry Crunch` <chr> NA, "DESPA~
$ `Q6 | Skittles` <chr> NA, "DESPA~
$ `Q6 | Smarties (American)` <chr> NA, "DESPA~
$ `Q6 | Smarties (Commonwealth)` <chr> NA, "DESPA~
$ `Q6 | Snickers` <chr> NA, "MEH",~
$ `Q6 | Sourpatch Kids (i.e. abominations of nature)` <chr> NA, "DESPA~
$ `Q6 | Spotted Dick` <chr> NA, "DESPA~
$ `Q6 | Starburst` <chr> NA, "MEH",~
$ `Q6 | Sweet Tarts` <chr> NA, "DESPA~
$ `Q6 | Swedish Fish` <chr> NA, "MEH",~
$ `Q6 | Sweetums (a friend to diabetes)` <chr> NA, "DESPA~
$ `Q6 | Take 5` <chr> NA, "DESPA~
$ `Q6 | Tic Tacs` <chr> NA, "DESPA~
$ `Q6 | Those odd marshmallow circus peanut things` <chr> NA, "DESPA~
$ `Q6 | Three Musketeers` <chr> NA, "JOY",~
$ `Q6 | Tolberone something or other` <chr> NA, "JOY",~
$ `Q6 | Trail Mix` <chr> NA, "DESPA~
$ `Q6 | Twix` <chr> NA, "JOY",~
$ `Q6 | Vials of pure high fructose corn syrup, for main-lining into your vein` <chr> NA, "DESPA~
$ `Q6 | Vicodin` <chr> NA, "DESPA~
$ `Q6 | Whatchamacallit Bars` <chr> NA, "DESPA~
$ `Q6 | White Bread` <chr> NA, "DESPA~
$ `Q6 | Whole Wheat anything` <chr> NA, "DESPA~
$ `Q6 | York Peppermint Patties` <chr> NA, "DESPA~
$ `Q7: JOY OTHER` <chr> NA, "Mound~
$ `Q8: DESPAIR OTHER` <chr> NA, NA, NA~
$ `Q9: OTHER COMMENTS` <chr> NA, "Botto~
$ `Q10: DRESS` <chr> NA, "White~
$ ...114 <chr> NA, NA, NA~
$ `Q11: DAY` <chr> NA, "Sunda~
$ `Q12: MEDIA [Daily Dish]` <dbl> NA, NA, NA~
$ `Q12: MEDIA [Science]` <dbl> NA, 1, NA,~
$ `Q12: MEDIA [ESPN]` <dbl> NA, NA, NA~
$ `Q12: MEDIA [Yahoo]` <dbl> NA, NA, NA~
$ `Click Coordinates (x, y)` <chr> NA, "(84, ~
library(janitor)
Attaching package: ‘janitor’
The following objects are masked from ‘package:stats’:
chisq.test, fisher.test
janitor_candy_2015 <- janitor::clean_names(candy_2015)
janitor_candy_2015
janitor_candy_2016 <- janitor::clean_names(candy_2016)
janitor_candy_2016
janitor_candy_2017 <- janitor::clean_names(candy_2017)
janitor_candy_2017
just looking at who is reporting back about these ones…
janitor_candy_2017 %>%
select(q3_age, q2_gender, q6_independent_m_ms, q6_green_party_m_ms)
janitor_candy_2016 %>%
select(york_peppermint_patties_ignore)
#nobody
2015 clean - remove columns that aren’t candy (see readme) I went from bottom to top to check index as i went Also adding a “year” column and removing “timestamp” for the join (see if can work out extracting year from timestamp and moving over later). Doing this after so that column index isn’t ruined
names(janitor_candy_2015)
[1] "timestamp"
[2] "how_old_are_you"
[3] "are_you_going_actually_going_trick_or_treating_yourself"
[4] "butterfinger"
[5] "x100_grand_bar"
[6] "anonymous_brown_globs_that_come_in_black_and_orange_wrappers"
[7] "any_full_sized_candy_bar"
[8] "black_jacks"
[9] "bonkers"
[10] "bottle_caps"
[11] "box_o_raisins"
[12] "brach_products_not_including_candy_corn"
[13] "bubble_gum"
[14] "cadbury_creme_eggs"
[15] "candy_corn"
[16] "vials_of_pure_high_fructose_corn_syrup_for_main_lining_into_your_vein"
[17] "candy_that_is_clearly_just_the_stuff_given_out_for_free_at_restaurants"
[18] "cash_or_other_forms_of_legal_tender"
[19] "chiclets"
[20] "caramellos"
[21] "snickers"
[22] "dark_chocolate_hershey"
[23] "dental_paraphenalia"
[24] "dots"
[25] "fuzzy_peaches"
[26] "generic_brand_acetaminophen"
[27] "glow_sticks"
[28] "broken_glow_stick"
[29] "goo_goo_clusters"
[30] "good_n_plenty"
[31] "gum_from_baseball_cards"
[32] "gummy_bears_straight_up"
[33] "creepy_religious_comics_chick_tracts"
[34] "healthy_fruit"
[35] "heath_bar"
[36] "hershey_s_kissables"
[37] "hershey_s_milk_chocolate"
[38] "hugs_actual_physical_hugs"
[39] "jolly_rancher_bad_flavor"
[40] "jolly_ranchers_good_flavor"
[41] "kale_smoothie"
[42] "kinder_happy_hippo"
[43] "kit_kat"
[44] "hard_candy"
[45] "lapel_pins"
[46] "lemon_heads"
[47] "licorice"
[48] "licorice_not_black"
[49] "lindt_truffle"
[50] "lollipops"
[51] "mars"
[52] "mary_janes"
[53] "maynards"
[54] "milk_duds"
[55] "laffy_taffy"
[56] "minibags_of_chips"
[57] "joy_joy_mit_iodine"
[58] "reggie_jackson_bar"
[59] "pixy_stix"
[60] "nerds"
[61] "nestle_crunch"
[62] "nown_laters"
[63] "pencils"
[64] "milky_way"
[65] "reese_s_peanut_butter_cups"
[66] "tolberone_something_or_other"
[67] "runts"
[68] "junior_mints"
[69] "senior_mints"
[70] "mint_kisses"
[71] "mint_juleps"
[72] "mint_leaves"
[73] "peanut_m_m_s"
[74] "regular_m_ms"
[75] "mint_m_ms"
[76] "ribbon_candy"
[77] "rolos"
[78] "skittles"
[79] "smarties_american"
[80] "smarties_commonwealth"
[81] "chick_o_sticks_we_don_t_know_what_that_is"
[82] "spotted_dick"
[83] "starburst"
[84] "swedish_fish"
[85] "sweetums"
[86] "those_odd_marshmallow_circus_peanut_things"
[87] "three_musketeers"
[88] "peterson_brand_sidewalk_chalk"
[89] "peanut_butter_bars"
[90] "peanut_butter_jars"
[91] "trail_mix"
[92] "twix"
[93] "vicodin"
[94] "white_bread"
[95] "whole_wheat_anything"
[96] "york_peppermint_patties"
[97] "please_leave_any_remarks_or_comments_regarding_your_choices"
[98] "please_list_any_items_not_included_above_that_give_you_joy"
[99] "please_list_any_items_not_included_above_that_give_you_despair"
[100] "guess_the_number_of_mints_in_my_hand"
[101] "betty_or_veronica"
[102] "check_all_that_apply_i_cried_tears_of_sadness_at_the_end_of"
[103] "that_dress_that_went_viral_early_this_year_when_i_first_saw_it_it_was"
[104] "fill_in_the_blank_taylor_swift_is_a_force_for"
[105] "what_is_your_favourite_font"
[106] "if_you_squint_really_hard_the_words_intelligent_design_would_look_like"
[107] "fill_in_the_blank_imitation_is_a_form_of"
[108] "please_estimate_the_degree_s_of_separation_you_have_from_the_following_celebrities_jk_rowling"
[109] "please_estimate_the_degree_s_of_separation_you_have_from_the_following_celebrities_jj_abrams"
[110] "please_estimate_the_degree_s_of_separation_you_have_from_the_following_celebrities_beyonce"
[111] "please_estimate_the_degree_s_of_separation_you_have_from_the_following_celebrities_bieber"
[112] "please_estimate_the_degree_s_of_separation_you_have_from_the_following_celebrities_kevin_bacon"
[113] "please_estimate_the_degree_s_of_separation_you_have_from_the_following_celebrities_francis_bacon_1561_1626"
[114] "sea_salt_flavored_stuff_probably_chocolate_since_this_is_the_it_flavor_of_the_year"
[115] "necco_wafers"
[116] "which_day_do_you_prefer_friday_or_sunday"
[117] "please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_bruce_lee"
[118] "please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_jk_rowling"
[119] "please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_malala_yousafzai"
[120] "please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_thom_yorke"
[121] "please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_jj_abrams"
[122] "please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_hillary_clinton"
[123] "please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_donald_trump"
[124] "please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_beyonce_knowles"
col_removed_candy_2015 <- janitor_candy_2015 %>%
select(-c(116:124), -c(97:113), -c(93:95), -c(90, 91), -c(peterson_brand_sidewalk_chalk, spotted_dick, mint_leaves, joy_joy_mit_iodine, minibags_of_chips, lapel_pins, kale_smoothie, hugs_actual_physical_hugs, heath_bar, healthy_fruit, creepy_religious_comics_chick_tracts, broken_glow_stick, glow_sticks, generic_brand_acetaminophen, dental_paraphenalia, cash_or_other_forms_of_legal_tender, vials_of_pure_high_fructose_corn_syrup_for_main_lining_into_your_vein, box_o_raisins, timestamp)) %>%
add_column(year = "2015", .before = 1)
col_removed_candy_2015
#view(col_removed_candy_2015)
2015 clean - renaming columns so they match other years considered merging anonymous brown globs with mary janes (in 2017 it is “anon brown… aka Mary Janes”) but looking at responses, they don’t match up so I’ll leave as anonymous_black_and_orange_wrappers…
col_removed_candy_2015 %>%
select(anonymous_brown_globs_that_come_in_black_and_orange_wrappers, mary_janes)
candy_2015_renamed <- col_removed_candy_2015 %>%
rename(age = how_old_are_you, trick_or_treating = are_you_going_actually_going_trick_or_treating_yourself, anonymous_black_and_orange_wrapper = anonymous_brown_globs_that_come_in_black_and_orange_wrappers, brach_not_including_candy_corn = brach_products_not_including_candy_corn, restaurant_candy = candy_that_is_clearly_just_the_stuff_given_out_for_free_at_restaurants, hersheys_dark_chocolate = dark_chocolate_hershey, gummy_bears = gummy_bears_straight_up, hersheys_kissables = hershey_s_kissables, hersheys_milk_chocolate = hershey_s_milk_chocolate, licorice_black = licorice, reeses_peanut_butter_cups = reese_s_peanut_butter_cups, toblerone = tolberone_something_or_other, peanut_m_ms = peanut_m_m_s, chick_o_stick = chick_o_sticks_we_don_t_know_what_that_is, circus_peanuts = those_odd_marshmallow_circus_peanut_things, sea_salt_chocolate = sea_salt_flavored_stuff_probably_chocolate_since_this_is_the_it_flavor_of_the_year)
2016 clean - remove unnecessary columns - bottom to top Also adding a year column for the binding of rows
2016 - rename so they match other sheets considered merging anonymous brown globs with mary janes (in 2017 it is “anon brown… aka Mary Janes”) but looking at responses, they don’t match up so I’ll leave as anonymous_black_and_orange_wrappers…
col_removed_candy_2016 %>%
select(anonymous_brown_globs_that_come_in_black_and_orange_wrappers, mary_janes)
candy_2016_renamed <- col_removed_candy_2016 %>%
rename(trick_or_treating = are_you_going_actually_going_trick_or_treating_yourself, gender = your_gender, age = how_old_are_you, country = which_country_do_you_live_in, state_or_prov = which_state_province_county_do_you_live_in, anonymous_black_and_orange_wrapper = anonymous_brown_globs_that_come_in_black_and_orange_wrappers, bonkers = bonkers_the_candy, restaurant_candy = candy_that_is_clearly_just_the_stuff_given_out_for_free_at_restaurants, chick_o_stick = chick_o_sticks_we_don_t_know_what_that_is, gummy_bears = gummy_bears_straight_up, hersheys_milk_chocolate = hershey_s_milk_chocolate, licorice_black = licorice_yes_black, peanut_m_ms = peanut_m_m_s, party_bag_m_ms = third_party_m_ms, reeses_peanut_butter_cups = reese_s_peanut_butter_cups, sourpatch_kids = sourpatch_kids_i_e_abominations_of_nature, sweetarts = sweet_tarts, sweetums = sweetums_a_friend_to_diabetes, circus_peanuts = those_odd_marshmallow_circus_peanut_things, toblerone = tolberone_something_or_other)
candy_2016_renamed
NA
2017 clean - remove those not candy and internal id as others don’t have Also adding a year column for the join
col_removed_candy_2017 <- janitor_candy_2017 %>%
select(-c(102, 104, 105, 107, 108, 110:120), -c(q6_spotted_dick, q6_sandwich_sized_bags_filled_with_boo_berry_crunch, q6_real_housewives_of_orange_county_season_9_blue_ray, q6_minibags_of_chips, q6_abstained_from_m_ming, q6_kale_smoothie, q6_joy_joy_mit_iodine, q6_hugs_actual_physical_hugs, q6_heath_bar, q6_healthy_fruit, q6_glow_sticks, q6_generic_brand_acetaminophen, q6_dental_paraphenalia, q6_creepy_religious_comics_chick_tracts, q6_chardonnay, q6_cash_or_other_forms_of_legal_tender, q6_broken_glow_stick, q6_boxo_raisins, q6_bonkers_the_board_game, internal_id)) %>%
add_column(year = "2017", .before = 1)
col_removed_candy_2017
2017 - rename - get rid of q1/2/3/4/5/6 at the start of col names and rename to match 2015 and 16
candy_2017_q_removed <- col_removed_candy_2017 %>%
rename_all(~ sub("^[a-z0-9]{2}_", "",
make.names(names(col_removed_candy_2017))))
candy_2017_renamed <- candy_2017_q_removed %>%
rename(trick_or_treating = going_out, state_or_prov = state_province_county_etc, x100_grand_bar = `100_grand_bar`, mary_janes = anonymous_brown_globs_that_come_in_black_and_orange_wrappers_a_k_a_mary_janes, bonkers = bonkers_the_candy, restaurant_candy = candy_that_is_clearly_just_the_stuff_given_out_for_free_at_restaurants, chick_o_stick = chick_o_sticks_we_don_t_know_what_that_is, gummy_bears = gummy_bears_straight_up, hersheys_milk_chocolate = hershey_s_milk_chocolate, licorice_black = licorice_yes_black, peanut_m_ms = peanut_m_m_s, green_m_ms = green_party_m_ms, lone_m_ms = independent_m_ms, reeses_peanut_butter_cups = reese_s_peanut_butter_cups, sourpatch_kids = sourpatch_kids_i_e_abominations_of_nature, sweetarts = sweet_tarts, sweetums = sweetums_a_friend_to_diabetes, circus_peanuts = those_odd_marshmallow_circus_peanut_things, toblerone = tolberone_something_or_other)
candy_2017_renamed
view(candy_2015_renamed)
view(candy_2016_renamed)
view(candy_2017_renamed)
Getting an idea of people’s responses:
distinct(candy_2015_renamed, age) #(chr...) 146 responses, some silly and some strange
distinct(candy_2015_renamed, trick_or_treating) # yes or no (chr)
distinct(candy_2015_renamed, starburst)
distinct(candy_2016_renamed, age) #chr 98 incl silly/strange
distinct(candy_2016_renamed, trick_or_treating) #Yes No (chr)
distinct(candy_2016_renamed, gender) #Male, Female, Other, I'd rather not say, NA
distinct(candy_2016_renamed, country) #93 some silly, some e.g. USA, US, us, u.s.a. etc
distinct(candy_2016_renamed, starburst)
distinct(candy_2017_renamed, age) #chr 107 incl silly/strange
distinct(candy_2017_renamed, trick_or_treating) #Yes No and NA(chr)
distinct(candy_2017_renamed, gender) #Male, Female, Other, I'd rather not say, NA
distinct(candy_2017_renamed, country) #118 some silly, some e.g. USA, US, us, u.s.a. etc
distinct(candy_2017_renamed, starburst)
Join all three
—————- COUNTRY CLEANING ————————– Note that 2015 (5630 rows of 9349) has no country data… this seems to be American so maybe change NAs to USA at later date(??)
bound_candy %>%
distinct(country)
# 169 country variations
bound_candy %>%
filter(is.na(country))
# 5715 rows in country are NA
country_2016 <- candy_2016_renamed %>%
mutate(country = str_replace_all(country, pattern = "^[uU]+[sS]+[aA]+", "America"),
country = str_replace_all(country, pattern = "^[uU]+\\.+[sS]+\\.+[aA]+\\.", "America"),
country = str_replace_all(country, pattern = "^[Uu]+[Ss]+[Aa]+[!][+]{1, }", "America")
# country = str_replace_all(country, pattern = "^[uU]+[sS]+[aA]+[:punct:]+", "America")
)
Error in `mutate()`:
! Problem while computing `country = str_replace_all(...)`.
Caused by error in `stri_replace_all_regex()`:
! Error in {min,max} interval. (U_REGEX_BAD_INTERVAL, context=`^[Uu]+[Ss]+[Aa]+[!][+]{1, }`)
Backtrace:
1. candy_2016_renamed %>% ...
7. stringr::str_replace_all(...)
8. stringi::stri_replace_all_regex(...)
mutate(col name, .fns = ~str_remove_all(., “JUV|AD|IMM|SUB|PL*|[0-9]|“, .names = ‘fixed_{.col}’) )
AGE QUESTION
#bound_candy %>%
# distinct(age)
#274 different ages (chr)
#bound_candy <- bound_candy %>%
# mutate(age = as.double(age))
#need to remove all non-numbers and NAs first
#age_focus <- bound_candy %>%
# select(age) %>%
#filter(age < "90") %>%
#arrange(desc(age))#
#view(age_focus) # still have 0.62, 142, 1e+18 etc in there...
library(stringr)
year_pattern <- “1{4}”
col_removed_candy_2015 %>% select(timestamp) %>% str_extract(year_pattern)
0-9↩︎